(Author: Zifan Wang)
Sys.setenv("plotly_username"="ziwang970")
Sys.setenv("plotly_api_key"="Rh542AcijT2qJ07JZsQY")
# read in dataset
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df <- read.csv("C:/Users/ziwan/Desktop/2018 Fall Courses/BST 260/Project/dataset/flight2017.csv")
# calculate mean departure delay minutes by state
state_delay <- df %>%
group_by(ORIGIN_STATE_ABR) %>%
summarize(mean_delay = mean(DEP_DELAY_NEW, na.rm = TRUE))
# give state boundaries white borders
l <- list(color = toRGB("white"), width = 2)
# specify some map projection/options
g <- list(
scope = 'usa',
projection = list(type = 'albers usa'),
showlakes = TRUE,
lakecolor = toRGB('white')
)
# make the plot
p <- plot_geo(state_delay, locationmode = 'USA-states') %>%
add_trace(
z = ~mean_delay, locations = ~ORIGIN_STATE_ABR,
color = ~mean_delay, colors = 'Purples'
) %>%
colorbar(title = "Departure delay in minutes") %>%
layout(
title = '2017 average departure delay (minutes) by states',
geo = g
)
p
# calculate mean departure delay minutes by city
city_delay <- df %>%
group_by(ORIGIN_CITY_NAME) %>%
summarize(mean_delay = mean(DEP_DELAY_NEW, na.rm = TRUE))
library(splitstackshape)
city_delay <- cSplit(city_delay, "ORIGIN_CITY_NAME", sep=",")
city_delay <- city_delay %>% mutate(name = ORIGIN_CITY_NAME_1)
# add the coordination of cities
coordinate <- read.csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_us_cities.csv')
city_delay <- city_delay %>% mutate(name = trimws(as.character(name)))
coordinate <- coordinate %>% mutate(name = trimws(as.character(name)))
merged_city_delay <- left_join(city_delay,coordinate, by='name')
merged_city_delay <- merged_city_delay %>%
group_by(name) %>%
summarize(mean_delay = mean(mean_delay, na.rm = TRUE), lat = mean(lat), lon = mean(lon))
# draw the plot by cities
merged_city_delay$q <- with(merged_city_delay, cut(mean_delay, quantile(mean_delay)))
levels(merged_city_delay$q) <- paste(c("1st", "2nd", "3rd", "4th", "5th"), "Quantile")
merged_city_delay$q <- as.ordered((merged_city_delay$q))
g <- list(
scope = 'usa',
projection = list(type = 'albers usa'),
showland = TRUE,
landcolor = toRGB("gray85"),
subunitwidth = 1,
countrywidth = 1,
subunitcolor = toRGB("white"),
countrycolor = toRGB("white")
)
p <- plot_geo(merged_city_delay, locationmode = 'USA-states', sizes = c(1, 250)) %>%
add_markers(
x = ~lon, y = ~lat, size = ~mean_delay, color = ~q, hoverinfo = "text",
text = ~paste(merged_city_delay$name, "<br />", merged_city_delay$mean_delay, "minutes")
) %>%
layout(title = '2017 average departure delay (minutes) by city', geo = g)
p
## Warning: Ignoring 102 observations
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
# group by flight routes and calculate mean departure delay
route_delay <- df %>%
group_by(ORIGIN_CITY_NAME, DEST_CITY_NAME) %>%
summarize(mean_delay = mean(DEP_DELAY_NEW, na.rm = TRUE))
library(splitstackshape)
route_delay <- cSplit(route_delay, "ORIGIN_CITY_NAME", sep=",")
route_delay <- cSplit(route_delay, "DEST_CITY_NAME", sep=",")
route_delay <- route_delay %>% mutate(name1 = ORIGIN_CITY_NAME_1, name2 = DEST_CITY_NAME_1)
# add the coordination of cities
coordinate <- read.csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_us_cities.csv')
route_delay <- route_delay %>% mutate(name1 = trimws(as.character(name1)), name2 = trimws(as.character(name2)))
coordinate <- coordinate %>% mutate(name = trimws(as.character(name)))
merged_1 <- left_join(route_delay,coordinate, by = c("name1" = "name")) %>%
rename(lat1 = lat, lon1 = lon, pop1 = pop) %>%
select(mean_delay, name1, name2, pop1, lat1, lon1)
merged_2 <- left_join(route_delay,coordinate, by = c("name2" = "name")) %>%
rename(lat2 = lat, lon2 = lon, pop2 = pop) %>%
select(mean_delay, name1, name2, pop2, lat2, lon2)
merged_route_delay <- left_join(merged_1, merged_2, by = c("name1", "name2")) %>%
rename(mean_delay = mean_delay.x) %>%
select(mean_delay, name1, name2, pop1, lat1, lon1, pop2, lat2, lon2)
merged_route_delay <- merged_route_delay %>% # get the mean population for each city
group_by(name1, name2) %>%
summarize(mean_delay = mean(mean_delay, na.rm = TRUE),
pop1 = mean(pop1, na.rm = TRUE), pop2 = mean(pop2, na.rm = TRUE),
lat1 = mean(lat1, na.rm = TRUE), lon1 = mean(lon1, na.rm = TRUE),
lat2 = mean(lat2, na.rm = TRUE), lon2 = mean(lon2, na.rm = TRUE))
# map projection
# restrict to >15, >30, >60, >90 minutes of delay
delay15 <-merged_route_delay %>%
filter(mean_delay >= 15)
delay30 <-merged_route_delay %>%
filter(mean_delay >= 30)
delay60 <-merged_route_delay %>%
filter(mean_delay >= 60)
delay90 <-merged_route_delay %>%
filter(mean_delay >= 90)
geo <- list(
scope = 'north america',
projection = list(type = 'azimuthal equal area'),
showland = TRUE,
landcolor = toRGB("gray95"),
countrycolor = toRGB("gray80")
)
p1 <- plot_geo(locationmode = 'USA-states', color = I("red")) %>%
add_markers(
data = delay15, x = ~lon1, y = ~lat1, text = ~name1,
size = ~pop1, hoverinfo = "text", alpha = 0.5
) %>%
add_markers(
data = delay15, x = ~lon2, y = ~lat2, text = ~name2,
size = ~pop2, hoverinfo = "text", alpha = 0.5
) %>%
add_segments(
x = ~lon1, xend = ~lon2,
y = ~lat1, yend = ~lat2,
alpha = 0.3, size = I(1), hoverinfo = "none"
) %>%
layout(
title = '2017 flight routes with >15 min delay',
geo = geo, showlegend = FALSE)
p2 <- plot_geo(locationmode = 'USA-states', color = I("red")) %>%
add_markers(
data = delay30, x = ~lon1, y = ~lat1, text = ~name1,
size = ~pop1, hoverinfo = "text", alpha = 0.5
) %>%
add_markers(
data = delay30, x = ~lon2, y = ~lat2, text = ~name2,
size = ~pop2, hoverinfo = "text", alpha = 0.5
) %>%
add_segments(
x = ~lon1, xend = ~lon2,
y = ~lat1, yend = ~lat2,
alpha = 0.3, size = I(1), hoverinfo = "none"
) %>%
layout(
title = '2017 flight routes with >30 min delay',
geo = geo, showlegend = FALSE)
p3 <- plot_geo(locationmode = 'USA-states', color = I("red")) %>%
add_markers(
data = delay60, x = ~lon1, y = ~lat1, text = ~name1,
size = ~pop1, hoverinfo = "text", alpha = 0.5
) %>%
add_markers(
data = delay60, x = ~lon2, y = ~lat2, text = ~name2,
size = ~pop2, hoverinfo = "text", alpha = 0.5
) %>%
add_segments(
x = ~lon1, xend = ~lon2,
y = ~lat1, yend = ~lat2,
alpha = 0.3, size = I(1), hoverinfo = "none"
) %>%
layout(
title = '2017 flight routes with >60 min delay',
geo = geo, showlegend = FALSE )
p4 <- plot_geo(locationmode = 'USA-states', color = I("red")) %>%
add_markers(
data = delay90, x = ~lon1, y = ~lat1, text = ~name1,
size = ~pop1, hoverinfo = "text", alpha = 0.5
) %>%
add_markers(
data = delay90, x = ~lon2, y = ~lat2, text = ~name2,
size = ~pop2, hoverinfo = "text", alpha = 0.5
) %>%
add_segments(
x = ~lon1, xend = ~lon2,
y = ~lat1, yend = ~lat2,
alpha = 0.3, size = I(1), hoverinfo = "none"
) %>%
layout(
title = '2017 flight routes with >90 min delay',
geo = geo, showlegend = FALSE )
p <- subplot(p1, p2, p3, p4, nrows = 2) %>%
layout(title = "2017 flight routes with different delay times",
xaxis = list(domain=list(x=c(0,0.5),y=c(0,0.5))),
scene = list(domain=list(x=c(0.5,1),y=c(0,0.5))),
xaxis2 = list(domain=list(x=c(0.5,1),y=c(0.5,1))),
annotations = list(
list(x = 0.2 , y = 1, text = ">15 mins", showarrow = F, xref='paper', yref='paper'),
list(x = 0.8 , y = 1, text = ">30 mins", showarrow = F, xref='paper', yref='paper'),
list(x = 0.2 , y = 0.5, text = ">60 mins", showarrow = F, xref='paper', yref='paper'),
list(x = 0.8 , y = 0.5, text = ">90 mins", showarrow = F, xref='paper', yref='paper'))
)
## Warning: Ignoring 262 observations
## Warning: Ignoring 244 observations
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: Ignoring 50 observations
## Warning: Ignoring 48 observations
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: Ignoring 13 observations
## Warning: Ignoring 22 observations
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: Ignoring 2 observations
## Warning: Ignoring 9 observations
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
p